#homework 4 -> Mateusz Kubita, plotly
import plotly
import pandas
import numpy
import re
import plotly.express as px
df = pandas.read_csv('philly_data.csv')
df2 = df[['Sale Date', 'Sale Price/bid price']]
df2.columns
Index(['Sale Date', 'Sale Price/bid price'], dtype='object')
df2 = df2.rename(columns={'Sale Date': 'date', 'Sale Price/bid price': 'kwota'})
df2.head()
| date | kwota | |
|---|---|---|
| 0 | September 13 2016 | $11,400 |
| 1 | August 2 2016 | $8,500 |
| 2 | August 2 2016 | $12,600 |
| 3 | August 2 2016 | $9,200 |
| 4 | August 2 2016 | $8,900 |
def split_it(cena):
return re.findall("\d+\,\d+", cena)[0]
df2['kwota'] = df2['kwota'].astype(pandas.StringDtype())
df2['date'] = df2['date'].astype(pandas.StringDtype())
df2.dropna(inplace = True)
df2['kwota'] = df2['kwota'].apply(split_it)
df2.head()
| date | kwota | |
|---|---|---|
| 0 | September 13 2016 | 11,400 |
| 1 | August 2 2016 | 8,500 |
| 2 | August 2 2016 | 12,600 |
| 3 | August 2 2016 | 9,200 |
| 4 | August 2 2016 | 8,900 |
df2['kwota'] = df2['kwota'].str.replace(',', '').astype(float)
df2.date.unique()
<StringArray>
['September 13 2016', 'August 2 2016', 'October 4 2016',
'March 7 2017', 'February 7 2017']
Length: 5, dtype: string
#obliczamy srednia kwote dla kazdego miesiaca
df3 = df2.groupby(['date']).mean().reset_index()
df3
| date | kwota | |
|---|---|---|
| 0 | August 2 2016 | 53231.379310 |
| 1 | February 7 2017 | 53500.000000 |
| 2 | March 7 2017 | 57300.000000 |
| 3 | October 4 2016 | 62235.849057 |
| 4 | September 13 2016 | 59883.720930 |
d = {'February':2, 'August':8, 'March':3, 'October':10,"September":9}
df3.dtypes
date string kwota float64 dtype: object
df3['month'] = df3['date'].str.split(' ').str[0]
df3['year']= df3['date'].str.split(' ').str[3]
df3.month = df3.month.map(d)
df3.dtypes
date string kwota float64 month int64 year object dtype: object
df3.sort_values(['year','month'], inplace = True)
df3['date'] = df3['year'].astype("string") + " " + df3['month'].astype("string")
df3 = df3.reset_index(drop=True)
df3
| date | kwota | month | year | |
|---|---|---|---|---|
| 0 | 2016 8 | 53231.379310 | 8 | 2016 |
| 1 | 2016 9 | 59883.720930 | 9 | 2016 |
| 2 | 2016 10 | 62235.849057 | 10 | 2016 |
| 3 | 2017 2 | 53500.000000 | 2 | 2017 |
| 4 | 2017 3 | 57300.000000 | 3 | 2017 |
fig = px.line(df3,
x='date', y="kwota",
color_discrete_sequence = ['navy'],
title = "Średnia cena nieruchomości w Filadelfii",
markers = True)
fig.show()
#Niestety dane te maja tylko 5 dat, co sprawia ze wykres ten wyglada dosc ubogo.
#W sytuacji wiekszych danych wykres ten bylby bogatszy
#i moglibysmy okreslic ciekawy trend cen nieruchomosci w Filadelfii na przestrzeni czasu